Load Libraries

library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)

Exercise 1

SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")
p<- ggplot(SNPs,aes(chromosome)) +
  geom_bar(fill = "blue") +
  ggtitle("Total SNPs for each chromosome") +
  ylab("Total number of SNPs") +
  xlab("Chromosome")
p

Exercise 2

mycolor<-c("AA"="green", "AC"="green", "AG"="green", "AT"="green", "CC"="green", "CG"="green", "CT"="green", "GG"="green", "GT"="green", "TT"="green","A"="pink", "C"="pink", "G"="pink", "T"="pink", "D"="orange", "DD"="orange", "DI"="orange","I"="orange","II"="orange","--"="red")
ggplot(SNPs, aes(chromosome, fill = genotype))+
  geom_bar(color = "black")+
  ggtitle("Total SNPs count for each chromosome")+
  ylab("SNP count")+
  xlab("Chromosome")+
  scale_fill_manual(values=c(mycolor))

Exercise 3

ppi <- 300
png("Exercise3_plot", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data = SNPs, aes(chromosome, fill = genotype)) + 
  geom_bar(position = "dodge")
dev.off()
## quartz_off_screen 
##                 2
Genotype counts per chromosome

Genotype counts per chromosome

Exercise 4

SNPs$chromosome=ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
ggplot(SNPs, aes(chromosome, fill = genotype))+
  geom_bar(position = "dodge")+ 
  facet_wrap(~genotype, ncol = 2)+
  ggtitle("Genotype for Each Type of Chromosome")+
  ylab("Genotype Count")+
  xlab("Chromosome")

Exercise 5

SNPs$chromosome=ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
p <- ggplot(SNPs, aes(chromosome, fill = genotype))+
  geom_bar(position = "dodge")+ 
  facet_wrap(~genotype, ncol = 2)
ggplotly(p)

Exercise 6

chromosome_subset <- subset(SNPs, chromosome == "Y")
 datatable(chromosome_subset)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html